home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Aminet 16
/
Aminet 16 (1996)(GTI - Schatztruhe)[!][Dec 1996].iso
/
Aminet
/
util
/
misc
/
cookietool.lha
/
cookietool
/
cookietool.c
< prev
next >
Wrap
C/C++ Source or Header
|
1996-10-04
|
12KB
|
456 lines
; /*
gcc cookietool.c -O -noixemul -o cookietool
quit 0 ; */
/*************************************************************************\
cookietool: remove duplicate entries from a cookie file
Options allow sorting the output alphabetically, and reformatting the
cookies to a given line length. The expected file format is plain text
with a "%%" line ending each cookie.
Usage:
cookietool [options] infile outfile
options: meaning:
-f reformat, killing extra blanks
-f<len> reformat to max. <len> characters per line
-b cut two-liners in half when reformatting
-s0 sort output alphabetically
-s1 " by first word, i. e. ignoring leading spaces and such
-s2 " by first word *on the last line*
-s3 " by last word
-S[0-3] case sensitive sorting
-x extended search, removes "almost indentical"
items, too
Limitations:
Input and output file must not be the same.
Options -x and -s/-S are mutual exclusive.
\*************************************************************************/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
char version[] = "$VER: cookietool 1.3 (03.10.96)";
long listsize = 1000; /* will be adjusted dynamically */
long listed = 0;
int extended = 0, beauty = 0, case_sense = 0;
int finalsort = 0, formatlen = 0;
struct cookie {
char *text;
char *sorthook;
long number;
};
struct cookie *clist;
#define CBUFSIZE 20000
#define LBUFSIZE 2000
char cbuf[CBUFSIZE]; /* large enough to hold one complete cookie */
char line[LBUFSIZE]; /* large enough to hold the longest line */
void help(char *s)
/* print a help text and nag about illegal parameter <s> */
{
if (s) printf("illegal option '%s'\n", s);
printf("usage: cookietool [options] <inputfile> <outputfile> \n");
printf("where options are:\n");
printf(" -x extended search for 'almost identical' cookies, too\n");
printf(" -s[0|1|2|3] sort output, looking at -s0: first character,\n");
printf(" -s1: first alphanumeric, -s2: first alnum. on last line,\n");
printf(" -s3: last word in cookie. (-s: same as -s1)\n");
printf(" -S[0|1|2|3] case sensitive sorting\n");
printf(" -f<width> reformat cookies to new text width (please, don't)\n");
printf(" -b make 'beautiful' line breaks for two-liners\n");
printf(" -f reformat, without changing line breaks\n");
}
int str_cmp(char *s, char *t)
/* variation to strcmp(), can behave case-insensitive, if told to */
{
if (case_sense) {
while (*s == *t) {
if (*s == '\0') return 0;
s++; t++;
}
return (*s - *t);
} else {
while (toupper(*s) == toupper(*t)) {
if (*s == '\0') return 0;
s++; t++;
}
return (toupper(*s) - toupper(*t));
}
}
void compress(char *s)
/* covert to uppercase and remove extra blanks and punctuation */
{
int onword=0;
char *s2=s;
char *home=s;
while (*s2) {
if (isalnum(*s2)) {
*s++ = toupper(*s2);
onword = 1;
} else {
if (onword) *s++ = ' '; /* ONE blank after each word */
onword = 0;
}
s2++;
}
if (!onword && s != home) s--; /* remove the last trailing blank */
*s = '\0';
}
int reformat(char *s, int linelen)
/* remove extra blanks and rearrange line breaks, return no. of lines */
{
int onword=0;
int lines=0;
char *s2=s, *home=s;
char *lastspace=s-1, *lastbreak=s-1;
while (*s2) {
if (isspace(*s2)) {
if (onword) {
*s++ = ' '; /* ONE blank after each word */
if (ispunct(s2[-1]) && isspace(s2[1]))
*s++ = ' '; /* allow a second space after a punctuation sign */
}
onword = 0;
} else {
*s++ = *s2;
onword = 1;
}
s2++;
}
if (s != home) {
if (!onword) s--; /* remove the last trailing blank */
*s++ = '\n'; /* but add a LF */
*s = '\0';
} else {
*s = '\0';
return 0; /* empty cookie */
}
/* now find out where to put line breaks: */
s = home; lines = 1;
while (*s) {
if (isspace(*s)) {
if (isspace(s[1])) s++; /* two spaces */
if (s-lastbreak > linelen) { /* line is getting too long */
if (lastbreak == lastspace)
lastspace = s; /* oops, one single huge word on this line %-( */
*lastspace = '\n'; lastbreak = lastspace;
lines++;
} else
lastspace = s;
}
s++;
}
/* two-liners look better when split in the middle: */
if (lines == 2 && beauty) {
*lastbreak = ' ';
s = s2 = home + strlen(home)/2;
while (isgraph(*s) && isgraph(*s2)) {
s--; s2++;
}
if (isspace(*s))
*s = '\n';
else if (isspace(s2[1])) /* two spaces */
s2[1] = '\n';
else
*s2 = '\n';
}
return lines;
}
void read_cookies(FILE *fp)
{
long cbuflen, ignored=0;
int lines;
unsigned char *s;
printf("reading cookies"); fflush(stdout);
strcpy(cbuf,""); lines=0; cbuflen=0;
while (fgets(line,LBUFSIZE,fp)) {
if (strncmp(line,"%%",2)==0) { /* "end of cookie"-marker */
if (lines>0) { /* store the cookie */
if (extended)
compress(cbuf); /* well, store its shadow only ;-| */
else if (formatlen>0)
reformat(cbuf, formatlen);
cbuflen = strlen(cbuf)+1;
if (clist[listed].text = malloc(cbuflen)) {
clist[listed].number = listed+ignored;
strcpy(clist[listed].text, cbuf);
s = clist[listed].sorthook = clist[listed].text;
} else {
printf("\nout of memory\n");
exit(20);
}
if (++listed == listsize) {
listsize = 3 * listsize / 2;
clist = realloc(clist, listsize * sizeof(struct cookie));
if (!clist) {
printf("\nlist reallocation failed\n");
exit(20);
}
}
} else {
ignored++; /* or ignore it */
}
/* start a new one */
strcpy(cbuf,""); lines=0; cbuflen=0;
} else {
if (formatlen<0)
reformat(line, LBUFSIZE);
if ((cbuflen += strlen(line)) >= CBUFSIZE) {
printf("\ncookie too big (>%ld chars)\n", CBUFSIZE);
exit(20);
}
strcat(cbuf,line); lines++;
}
}
printf(", done. (%ld read, %ld empty)\n", listed, ignored);
}
void set_hooks()
/* adjust sorthooks for the final sort, according to the desired mode */
{
long l;
int hot;
char *s;
printf("adjusting sort hooks"); fflush(stdout);
for (l=0; l<listed; l++) {
s = clist[l].text;
switch (finalsort) {
case 2: /* get a pointer to the first alphanumeric */
while (*s && !isalnum(*s)) s++;
clist[l].sorthook = s; break;
case 3: /* first alphanumeric on the last line */
hot = 1;
while (*s) {
if (*s == '\n') hot = 1;
if (isalnum(*s) && hot) {
clist[l].sorthook = s; hot = 0;
}
s++;
} break;
case 4: /* first alphanumeric in the last word */
hot = 1;
while (*s) {
if (!isalnum(*s))
hot = 1;
else if (hot) {
clist[l].sorthook = s; hot = 0;
}
s++;
} break;
default:
}
}
printf(", done.\n");
}
void write_cookies(FILE *fp)
/* also frees the allocated memory! */
{
long l;
printf("writing cookies"); fflush(stdout);
for (l=0; l<listed; l++) {
fputs(clist[l].text, fp);
fputs("%%\n", fp);
free(clist[l].text);
}
printf(", done.\n");
}
void filter_cookies(FILE *fp1, FILE *fp2)
/* combination of read_cookies()/write_cookies() */
{
long number=0, index=0;
rewind(fp1);
printf("copying cookies"); fflush(stdout);
strcpy(cbuf,"");
while (fgets(line,LBUFSIZE,fp1) && index<listed) {
if (strncmp(line,"%%",2)==0) { /* "end of cookie"-marker */
if (number == clist[index].number) { /* "good" cookie */
if (formatlen>0)
reformat(cbuf, formatlen);
fputs(cbuf, fp2); /* copy it */
fputs("%%\n", fp2);
free(clist[index++].text);
}
/* start a new one */
number++;
strcpy(cbuf,"");
} else {
if (formatlen<0)
reformat(line, LBUFSIZE);
/* no test for "monster cookies" needed in the 2nd pass */
strcat(cbuf,line);
}
}
printf(", done. (%ld copied)\n", index);
}
void sift(struct cookie v[], long i, long m, int mode)
/* centre routine to heapsort() */
/* mode==0: sort by name, mode==1: sort by number */
{
long j;
struct cookie temp;
if (mode==0) { /* by name */
while ((j = 2*(i+1)-1) <= m) {
if (j < m && (str_cmp(v[j].sorthook, v[j+1].sorthook) < 0) )
j++;
if (str_cmp(v[i].sorthook, v[j].sorthook) < 0) {
temp = v[i]; v[i] = v[j]; v[j] = temp;
i = j;
} else
i = m; /* done */
}
} else { /* by number */
while ((j = 2*(i+1)-1) <= m) {
if (j < m && (v[j].number < v[j+1].number) )
j++;
if (v[i].number < v[j].number) {
temp = v[i]; v[i] = v[j]; v[j] = temp;
i = j;
} else
i = m; /* done */
}
}
}
void my_heapsort(struct cookie v[], long n, int mode)
/* mode==0: sort by name, mode==1: sort by number */
{
long i;
struct cookie temp;
if (n<2) /* no sorting necessary */
return;
for (i = n/2-1; i >= 0; i--)
sift(v, i, n-1, mode);
for (i = n-1; i >= 1; i--) {
temp = v[0]; v[0] = v[i]; v[i] = temp;
sift(v, 0, i-1, mode);
}
}
void one_cookie(void)
{
long dbl = 0;
long i, j;
printf("removing double entries"); fflush(stdout);
my_heapsort(clist, listed, 0);
for (i = listed-1; i > 0; i = j)
for (j = i-1; j >= 0 && !str_cmp(clist[i].text,clist[j].text); j--) {
free(clist[j].text);
clist[j] = clist[--listed];
dbl++;
}
printf(", done. (%ld found)\n",dbl);
if (finalsort) {
if (finalsort>1) set_hooks();
printf("sorting"); fflush(stdout);
my_heapsort(clist, listed, 0);
} else {
printf("restoring order"); fflush(stdout);
my_heapsort(clist, listed, 1);
}
printf(", done.\n");
}
int main(int argc, char *argv[])
{
char *s;
char name1[100], name2[100];
FILE *infile, *outfile;
name1[0] = name2[0] = '\0';
if (argc<2) {
help(NULL);
return 5;
}
while (--argc) {
s = *++argv;
if (*s != '-') {
if (name1[0] == '\0')
strcpy(name1, s);
else
strcpy(name2, s);
} else {
switch (*++s) {
case 'f':
formatlen = -1; /* flag to reformat line by line only */
if (isdigit(*++s)) formatlen = atoi(s); /* cookie by cookie */
break;
case 'b':
beauty = 1; break;
case 's': case 'S':
if (extended)
printf("warning: -x disabled by %s\n", argv[0]);
extended = 0; case_sense = (*s == 'S'); finalsort = 2;
if (isdigit(*++s)) finalsort = atoi(s) + 1;
break;
case 'x':
if (finalsort)
printf("warning: -s/-S disabled by -x\n");
finalsort = 0; extended = 1; break;
default:
help(argv[0]); return 5;
}
}
}
if (name1[0] == '\0') {
help(NULL);
return 5;
}
if (name2[0] == '\0') {
strcpy(name2,name1);
strcat(name2,".crunch");
}
if (!(infile = fopen(name1,"r"))) {
printf("Can't open %s for input!\n", name1);
return 10;
}
if (!(outfile = fopen(name2,"w"))) {
printf("Can't open %s for output!\n", name2);
return 10;
}
clist = malloc(listsize * sizeof(struct cookie));
if (!clist) {
printf("list allocation failed\n");
exit(20);
}
read_cookies(infile);
one_cookie();
if (extended)
filter_cookies(infile,outfile);
else
write_cookies(outfile);
free(clist);
return 0;
}